# August 30, 2023
# Iris Arbogast irisarbogast@gmail.com
# Callaway Sant'Anna specification, summary table for Appendix Table 5, graphs of states

# --------------------  init  -------------------------------

# clean up environment
rm(list = ls())

install.packages("tidyverse")
install.packages("cdlTools")
install.packages("ggplot2")
install.packages("lubridate")
install.packages("haven")
install.packages("ggpubr")
install.packages("gtsummary")
install.packages("did")

library(tidyverse) # data cleaning functions
library(cdlTools) #This package has fips -> state code functions
library(ggplot2) # graphs
library(lubridate) # date manipulation
library(haven) # conversion from stata
library(ggpubr) # graph themes
library(gtsummary) # summary table
library(did) # Callaway-Sant'Anna method

`%notin%` <- negate(`%in%`)

#------------ Callaway Sant'anna specification ------------------------------
# read in data and clean
data <- read_dta("../Data/Clean/Cleaned_Policy_Data.dta") %>%
  drop_na(date) %>%
  mutate(date =  make_date(year2,mo2,1)) %>%
  drop_na(stusps) %>%
  # drop time periods as described in the paper
  filter(as.numeric(year2) > 2014 | as.numeric(mo2) > 8) %>%
  filter(as.numeric(year2) < 2021) %>% # | as.numeric(mo2) < 3) %>%
  arrange(stusps, date) %>%
  # make numeric and replace NAs with 0s
  mutate(newpolicydate = as.numeric(newpolicydate),
         newdate = as.numeric(newdate),
         newpolicydate = ifelse(is.na(newpolicydate), 0, newpolicydate),
         year_daypolicydate = ifelse(is.na(year_daypolicydate), 0, year_daypolicydate))  %>%
  group_by(newpolicydate, st) %>%
  select(-c(daypolicydate, timesince)) %>%
  mutate(date2 = as.POSIXct(date,'%y/%m/%d')) %>%
  filter(stusps %notin% c("OH", "CA", "NM"))

##### Table 4: Impact of policy changes on child Medicaid and CHIP enrollment using Callaway and Sant'Anna approach
# results will vary slightly due to randomness
csrez <- att_gt(data = data, yname = "mchiprate", tname = "newdate", 
                idname = "st", gname = "newpolicydate", clustervars = c("st"), allow_unbalanced_panel = FALSE)

summary(csrez)

# different aggregations
# Panel A
simpleagg <- aggte(csrez, type = "simple", na.rm = TRUE, alp = 0.01) 
summary(simpleagg)

# Panel B
groupagg <- aggte(csrez, type = "group", na.rm = TRUE, alp = 0.01)
groupagg


# make a crosswalk to tell which group has which states
data_crosswalk <- data %>%
  mutate(statename = fips(stusps, to = "Name")) %>%
  ungroup() %>%
  select(statename, newpolicydate) %>%
  as.matrix() %>%
  as.data.frame() %>%
  dplyr::rename(Group = newpolicydate) %>%
  distinct(statename, .keep_all = TRUE) %>%
  filter(Group != " 0") %>%
  pivot_wider(names_from = Group, values_from = statename) %>%
  pivot_longer(everything(), names_to = "Group", values_to = "State") %>%
  mutate(Group = as.numeric(Group))

# clean up results to export
group_df <- data.frame(Group = groupagg$egt, Estimate = groupagg$att.egt, SE = groupagg$se.egt) %>%
  right_join(data_crosswalk, by = "Group") %>%
  mutate(SE = paste0('="(',round(SE, digits = 4), ')"'),
         Estimate = paste0('="',round(Estimate, digits = 4)),
         " "= "") %>%
  pivot_longer(cols = -c(Group, State)) %>%
  mutate(State = paste0(State)) %>%
  arrange(State) %>%
  mutate(State = ifelse(name == " " | name == "SE", " ", State))

# Added stars and panel A by hand 
write.csv(group_df, "../Output/Tables/Table4.csv")




###### CS balanced panels (similar to event study)
# results will vary slightly due to randomness

dynagg <- aggte(csrez, type = "dynamic", na.rm = TRUE, balance_e = 16)
summary(dynagg)
dynamic_callawaysantanna_a <- ggdid(dynagg, type = "dynamic", title = " ", xgap = 10)
dynamic_callawaysantanna_a

ggsave("../Output/Graphs/Figure3a.tiff", dynamic_callawaysantanna_a, device = "tiff", height = 3, width = 4)


dynagg <- aggte(csrez, type = "dynamic", na.rm = TRUE, balance_e = 17)
summary(dynagg)
dynamic_callawaysantanna_b <- ggdid(dynagg, type = "dynamic", title = " ", xgap = 10)
dynamic_callawaysantanna_b

ggsave("../Output/Graphs/Figure3b.tiff", dynamic_callawaysantanna_b, device = "tiff", height = 3, width = 4)


dynagg <- aggte(csrez, type = "dynamic", na.rm = TRUE, balance_e = 23)
summary(dynagg)
dynamic_callawaysantanna_c <- ggdid(dynagg, type = "dynamic",  title = " ", xgap = 10,)
dynamic_callawaysantanna_c

ggsave("../Output/Graphs/Figure3c.tiff", dynamic_callawaysantanna_c, device = "tiff", height = 3, width = 4)


#--------------------------- Summary Table for ACS Demographics ----------------------------
data <- read_dta("../Data/Clean/Cleaned_Policy_Data.dta") %>%
  drop_na(date) %>%
  # rename date variable for R (from Stata)
  mutate(date =  make_date(year2,mo2,1)) %>%
  drop_na(stusps) %>%
  filter(as.numeric(year2) < 2021) %>% # 
  arrange(stusps, date) %>%
  mutate(date2 = as.POSIXct(date,'%y/%m/%d')) %>%
  group_by(stusps) %>%
  mutate(mchiprate_lag = dplyr::lag(mchiprate, 6))

# create datasets for each time period and then separately for only children with Medicaid
ACSData_2019 <- read_dta("../Data/Clean/CleanedACSData_2019.dta") %>%
  filter(age < 19) %>%
  mutate(groupvar = "All_2019")

ACSData_2019_mc <- ACSData_2019 %>%
  filter(Medicaid_covered == 1) %>%
  mutate(groupvar = "MC_2019")

ACSData_2010 <- read_dta("../Data/Clean/CleanedACSData_2010.dta") %>%
  filter(age < 19) %>%
  mutate(groupvar = "All_2010")

ACSData_2010_mc <- ACSData_2010 %>%
  filter(Medicaid_covered == 1) %>%
  mutate(groupvar = "MC_2010")

ACSData_2016 <- read_dta("../Data/Clean/CleanedACSData_2016.dta") %>%
  filter(age < 19) %>%
  mutate(groupvar = "All_2016")

ACSData_2016_mc <- ACSData_2016 %>%
  filter(Medicaid_covered == 1) %>%
  mutate(groupvar = "MC_2016")


# bind the datasets together for the table
ACSData <- rbind(ACSData_2010, ACSData_2010_mc, ACSData_2016, ACSData_2016_mc, ACSData_2019, ACSData_2019_mc) %>%
  mutate(groupvar = factor(groupvar, levels =  c("All_2010", "MC_2010", "All_2016", "MC_2016", "All_2019", "MC_2019")))

# create table using package gtsummary
results_shares <-
  survey::svydesign(~ 1, data = ACSData, weights = ~ perwt) %>%
  tbl_svysummary(
    include = c(Medicaid, AnyCoverage, female, male, white, black, raceother, hispanic, nothispanic, fpl100,
                fpl200, fpl2to400, fpl400, noyr4clgp, yr4clgp,
                noncitp, citp, engtbp, engnotbp, infants,
                Ageoneto5, Agesixto12, Age13up, no18yr),
    by = groupvar,
    statistic = list(all_categorical() ~ "{p}%"), 
    missing = "no",
   list(Medicaid~ "Medicaid", AnyCoverage ~ "Any Coverage", female ~ "Female", male ~ "Male", white~ "White", black~ "Black", raceother~ "Other Race", hispanic~ "Hispanic", nothispanic~ "Not Hispanic", fpl100~ "<100% FPL",
        fpl200 ~ "100-200% FPL", fpl2to400~ "200-400% FPL", fpl400~ ">400% FPL", noyr4clgp~ "No Parent with 4-yr College Education", yr4clgp~ "Parent with 4-yr College Education",
        noncitp ~ "Noncitizen Parent", citp~ "Citizen Parents", engtbp~ "Parent with Weak English", engnotbp~ "Parents with No Weak English", infants~ "Infants",
        Ageoneto5~ "Age 1 - 5", Agesixto12~ "Age 6 - 12", Age13up ~ "Age 13+", no18yr~ "Age 0 - 17")
  )

results_shares

results_shares %>%
  as_gt() %>%
  gt::gtsave(filename = "../Output/Tables/Ap_Table5_main.docx")

# for bottom row of table, number of person-years
results_obs <-
  survey::svydesign(~ 1, data = ACSData, weights = ~ perwt) %>%
  tbl_svysummary(
    by = groupvar,
    include = c(all),
    statistic = list(all_categorical() ~ "{n_unweighted}"),
    missing = "no", 
    list(all ~ "All"))
results_obs

results_obs %>%
  as_gt() %>%
  gt::gtsave(filename = "../Output/Tables/Ap_Table5_lastrow.docx")

# --------------- Create single graphs for no policy states ----------
data <- read_dta("../Data/Clean/Cleaned_Policy_Data.dta") %>%
  drop_na(date) %>%
  # rename date variable for R (from Stata)
  mutate(date =  make_date(year2,mo2,1)) %>%
  drop_na(stusps) %>%
  filter(as.numeric(year2) < 2021) %>% # | as.numeric(mo2) < 3) %>%
  arrange(stusps, date) %>%
  mutate(date2 = as.POSIXct(date,'%y/%m/%d')) %>%
  group_by(stusps) %>%
  mutate(mchiprate_lag = dplyr::lag(mchiprate, 6))

# function to scale labels
scaleFUN <- function(x) sprintf("%.2f", x)

medicaid_over_time_NY <- ggplot(data = subset(data,stusps == "NY"), aes(x=as.Date(date),y=mchiprate)) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("New York") +
  ylab("") + xlab("") + 
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  scale_color_manual('', values=c( "darkgreen")) +
  scale_linetype_manual('', values=c("dotdash")) +
  guides(fill = guide_legend(order = 1), 
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2)) + 
  xlim(as.Date("2014-01-01"), as.Date("2021-01-01")) + 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31")))

medicaid_over_time_NY


medicaid_over_time_NJ <- ggplot(data = subset(data,stusps == "NJ"), aes(x=as.Date(date),y=mchiprate)) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_rect(aes(xmin=as.Date("2014-01-01"), xmax=as.Date("2015-01-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("New Jersey") +
  ylab("") + xlab("") + 
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  scale_color_manual('', values=c( "darkgreen", "magenta")) +
  scale_linetype_manual('', values=c("dotdash", "longdash")) +
  guides(fill = guide_legend(order = 1), 
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2)) + 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31"))) + scale_y_continuous(labels=scaleFUN)

medicaid_over_time_NJ




medicaid_over_time_VA <- ggplot(data = subset(data,stusps == "VA"), aes(x=as.Date(date),y=mchiprate)) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("Virginia") +
  ylab("") + xlab("") + 
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  #guide = guide_legend(override.aes = list(alpha = 1)),
  #guide = guide_legend(title = NULL)) +
  scale_color_manual('', values=c( "darkgreen", "magenta")) +
  scale_linetype_manual('', values=c("dotdash", "longdash")) +
  guides(fill = guide_legend(order = 1), 
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2))+ 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31"))) + scale_y_continuous(labels=scaleFUN)

medicaid_over_time_VA



# Pennsylvania
medicaid_over_time_PA <- ggplot(data = subset(data,stusps == "PA"), aes(x=as.Date(date),y=mchiprate)) +
  geom_rect(aes(xmin=as.Date("2014-01-01"), xmax=as.Date("2015-01-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("Pennsylvania") +
  ylab("") + xlab("") +
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  scale_color_manual('', values=c("darkgreen", "darkblue")) +
  scale_linetype_manual('', values=c("dotdash", "dotted")) +
  guides(fill = guide_legend(order = 1),
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2))+ 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31")))

medicaid_over_time_PA

#---------------------------  Create single graphs for policy states ----------------------------

data <- data %>%
  filter(year >= 2014)

# TN
medicaid_over_time_TN <- ggplot(data = subset(data,stusps == "TN"), aes(x=as.Date(date),y=mchiprate)) +
  geom_vline(aes(xintercept = as.Date("2016-11-01"),
                 color="Increase in Administrative Burden", linetype = "Increase in Administrative Burden"), size=1) +
  geom_vline(aes(xintercept = as.Date("2019-02-01"),
                 color="End of Increase in Administrative Burden", linetype = "End of Increase in Administrative Burden"), size=1) +
  geom_rect(aes(xmin=as.Date("2014-01-01"), xmax=as.Date("2015-01-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("Tennessee") +
  ylab("") + xlab("") +
  scale_color_manual('', values=c( "black", "black")) +
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  scale_linetype_manual('', values=c("dotdash", "solid")) +
  guides(fill = guide_legend(order = 1), 
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2)) + 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31"))) + scale_y_continuous(labels=scaleFUN)
medicaid_over_time_TN


#MO
medicaid_over_time_MO <- ggplot(data = subset(data,stusps == "MO"), aes(x=as.Date(date),y=mchiprate)) +
  geom_vline(aes(xintercept = as.Date("2018-01-01"),
                 color="Increase in Administrative Burden", linetype = "Increase in Administrative Burden"), size=1) +
  geom_vline(aes(xintercept = as.Date("2022-02-01"),
                 color="End of Increase in Administrative Burden", linetype = "End of Increase in Administrative Burden"), size=1) +
  geom_rect(aes(xmin=as.Date("2014-01-01"), xmax=as.Date("2015-01-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("Missouri") +
  ylab("") + xlab("") +
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  scale_color_manual('', values=c("black", "black")) +
  scale_linetype_manual('', values=c("dotted", "solid")) +
  guides(fill = guide_legend(order = 1), 
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2)) + 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31"))) + scale_y_continuous(labels=scaleFUN)

medicaid_over_time_MO




#FL
medicaid_over_time_FL <- ggplot(data = subset(data,stusps == "FL"), aes(x=as.Date(date),y=mchiprate)) +
  geom_vline(aes(xintercept = as.Date("2017-07-01"),
                 color="Increase in Administrative Burden", linetype = "Increase in Administrative Burden"), size=1) +
  geom_vline(aes(xintercept = as.Date("2022-02-01"),
                 color="End of Increase in Administrative Burden", linetype = "End of Increase in Administrative Burden"), size=1) +
  geom_rect(aes(xmin=as.Date("2014-01-01"), xmax=as.Date("2015-01-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("Florida") +
  ylab("") + xlab("") +
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  scale_color_manual('', values=c("green", "black")) +
  scale_linetype_manual('', values=c("dashed", "solid")) +
  guides(fill = guide_legend(order = 1), 
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2)) + 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31"))) + scale_y_continuous(labels=scaleFUN)

medicaid_over_time_FL





#Colorado
medicaid_over_time_CO <- ggplot(data = subset(data,stusps == "CO"), aes(x=as.Date(date),y=mchiprate)) +
  geom_vline(aes(xintercept = as.Date("2017-03-01"),
                 color="Increase in Administrative Burden", linetype = "Increase in Administrative Burden"), size=1) +
  geom_vline(aes(xintercept = as.Date("2022-02-01"),
                 color="End of Increase in Administrative Burden", linetype = "End of Increase in Administrative Burden"), size=1) +
  geom_rect(aes(xmin=as.Date("2020-03-01"), xmax=as.Date("2020-12-01"), fill = "Redetermination Pause", ymin=-Inf, ymax=+Inf)) +
  geom_line(lwd=1.1)+ theme_pubr() + ggtitle("Colorado") +
  ylab("") + xlab("") +
  scale_fill_manual('Policy Change',
                    values = c("lightgray")) +
  scale_color_manual('', values=c("blue","black")) +
  scale_linetype_manual('', values=c("dashed","solid")) +
  guides(fill = guide_legend(order = 1), 
         colour = guide_legend(order = 2),
         linetype = guide_legend(order = 2)) + 
  scale_x_date(limits = as.Date(c("2014-01-01", "2020-12-31"))) + scale_y_continuous(labels=scaleFUN)

medicaid_over_time_CO




# -------- Combine and export graphs --------------

nopolicy_states <- ggarrange(medicaid_over_time_VA, medicaid_over_time_NY, medicaid_over_time_PA, medicaid_over_time_NJ, 
                             ncol = 2, nrow = 2, common.legend = TRUE, legend = "none") + theme(plot.margin = margin(.8,0,0,0, "cm"))
nopolicy_states

ggsave("../Output/Graphs/fig1b.tiff", nopolicy_states, device = "tiff", height = 7, width = 10)



policy_states <- ggarrange(medicaid_over_time_MO,medicaid_over_time_CO,medicaid_over_time_FL, medicaid_over_time_TN, 
          ncol = 2, nrow = 2, common.legend = TRUE, legend = "top") + theme(plot.margin = margin(.8,0,0,0, "cm"))

policy_states

ggsave("../Output/Graphs/fig1a.tiff", policy_states, device = "tiff", height = 7.5, width = 10)


